#Binding first run messages to toxicity scores
#Alan Yan
#2-27-2020

#clear environment
rm(list = ls())

#set working directory
setwd(dir = "C:/Users/Alan Yan/Dropbox/NGA/Gender Name Testing/PerspectiveAPI")

#load packages
library(pacman)
p_load(tidyverse)

#load data
coded_text <- read.csv("data/api_coded_texts.csv", header = TRUE, stringsAsFactors = FALSE)
first_run <- read.csv("data/messages_firstrun.csv", header = TRUE, stringsAsFactors = FALSE)
third_run <- read.csv("data/messages_thirdrun.csv", header = TRUE, stringsAsFactors = FALSE)

#join
names(first_run)[1] <- "id"
first_run$message_body <- NULL
first_run$experiment.1 <- 1

names(third_run)[1] <- "id"
third_run$comment <- NULL
third_run$experiment.1 <- 0

dt <- rbind(first_run, third_run) %>%
  inner_join(x = coded_text, y = ., by = "id")

#create treatment conditioin variables
dt$female <- ifelse(dt$group == "female", 1, 0)
dt$no.name <- ifelse(dt$group == "no.name", 1, 0)
dt$male <- ifelse(dt$group == "male", 1, 0)

#export
write.csv(dt, "data/cleaned_coded_text.csv")
